
/*This do file prepares the Summary of Deposits (SOD) data for the bank health (BH) data calculations. 
The SUMD table is a 'database' at the National Information Center (NIC) containing SOD data back to 1975.
However, it stopped being updated after 2014, and thus, an alternate table, SUMS, had to be used to continue
the time series through to current data. 

** At the time of the project, the SUMS data only contained data back to 2005.
It is now updated with data back to 1994 - (same as publicly available FDIC source mentioned
in the master do file and bhreadme.docx) - We used the entirety of what was available through the SUMS table
and used the SUMD to carry the SOD time series back to 1984. 

See master do file and bhreadme.docx for detail.
*/

local raw2020_dir <place your filepaths here>

use RawSOD_SUMDTable,clear //pre-2005 SOD data is from NIC Table: SUMD



/**************************************/
*Work with the pre-2005 SOD data first. 

*NOTE: See master do file / bhdocumentation.xlsx (tab 'MapVarNames-Btwn-SOD-Sources')
* for information on how one would go 
* about mapping the variable naming conventions between the FDIC and the NIC SOD data, as well as 
*between SUMD variable names and SUMS variable names.
*Where possible we have tried to note the correspondence between the naming conventions 


*KEEP RELEVANT YEARS
	gen 	year=year(d_dt)
	keep if year<=2004 
	drop if year<1984



*LABEL VARIABLES
	*These variables change prefix AND mnemonics relative to the SUMS data below:
	la var	sumdg959		"Filed thrift financial report ( eq 1) or filed Call Report/SOD report (eq 0)"
	la var	sumh2200		"Total HQ deposits"

		keep if sumdg959==0					//Keep call report filers (remove banks that filed the Thrift Financial Report - analog to "sumsfe13" below)
		drop if sumh2200==0 | sumh2200==.	//drop where HQ deposits missing or zero, mirrored below w/ sums data using variable 'sumhfe21'		


	*These variables ONLY change prefix (sumd to sums) relative to the SUMS data below:
	la var	year		"Year of Summary of Deposits filing, as of June"
	la var	id_rssd		"Bank identifer"
	la var	sumdg967	"Unique branch number assigned by FDIC"

	la var	sumd2200	"Branch deposits"

	la var	sumd9200	"Alphabetic code of state in which branch is physically located"
	la var	sumd9210	"FIPS state code - branch"
	la var	sumd9150	"FIPS county code - branch"


*RENAME VARIABLES
	rename 	id_rssd  rssdid
	rename	sumdg967 off_num 	//office ids aren't important other than to uniquely identify branches WITHIN AN INSTITUTION. 
	rename 	sumd2200 depsumbr
	rename 	sumd9200 stalpbr


*Drop if no branch deposits 
drop if depsumbr==0 	| depsumbr==.  			


*gen string location codes
gen str2 fips_state			=  string(sumd9210,"%02.0f")
gen str3 fips_county		=  string(sumd9150,"%03.0f")
gen str5 FIPS 				=  fips_state + fips_county 


sort year rssdid off_num depsumbr
	
	
local SOD_KEEP_VARS "year rssdid off_num depsumbr stalpbr FIPS"
keep	`SOD_KEEP_VARS'
order  	`SOD_KEEP_VARS'


* SAVE 1984 to 2004 cleaned and subsetted SOD data

		*########################################
		tempfile SUMD_84_04_var_rename
		save 	`SUMD_84_04_var_rename',replace  	
		*########################################

*-------------------------------------------------------------------------------

* SETUP THE 2005--2017 SOD DATA
use RawSOD_SUMSTable,clear //post-2005 SOD data is from National Information Center Table: SUMS


* KEEP RELEVANT YEARS
gen 	year=year(d_dt)
keep if year>=2005 


*LABEL VARIABLES
	*These variables change prefix and mnemonics relative to the SUMD data above
	la var	sumsfe13	"Filed thrift financial report ( eq TFR) or filed SOD/CALL report (eq CALL)"
	la var	sumhfe21	"Total HQ deposits"

	
	
			//sumsfe13: remove banks that filed the Thrift Financial Report - analog to "sumdg959" above
			keep if sumsfe13=="CALL"	 		//remove banks that filed the Thrift Financial Report(TFR)  - analog to "sumdg959" above, and 'CALL' in FDIC data
			drop if sumhfe21==0 | sumhfe21==.  	//drop where HQ deposits missing or zero, mirrored above w/ sums data using variable 'sumh2200'	
	
	
	
	*These variables ONLY change prefix (sums to sumd) relative to the SUMD data above:
	la var	year		"Year of Summary of Deposits filing, as of June"
	la var	id_rssd		"Bank identifer"
	la var	sums2200	"Branch deposits"

	la var	sums9200	"Alphabetic code of state in which brn is physically located"
	la var	sums9210	"FIPS state code - branch"
	la var	sums9150	"FIPS county code - branch"
	la var	sumsg967	"Unique branch number assigned by FDIC"


*RENAME VARIABLES	
	rename 	id_rssd  rssdid
	rename 	sums2200 depsumbr
	rename 	sums9200 stalpbr
	rename	sumsg967 off_num  //just unique branch identifier - never used to identify a branch independent of its hq bank, i.e., only used as a within-bank identifier.
 
 
*Drop if no branch deposits
drop if depsumbr==0 	| depsumbr==.  			


*gen string location codes
gen str2 fips_state			=  string(sums9210,"%02.0f")
gen str3 fips_county		=  string(sums9150,"%03.0f")
gen str5 FIPS 				=  fips_state + fips_county


sort year rssdid off_num depsumbr	
 
 
keep	`SOD_KEEP_VARS'
order  	`SOD_KEEP_VARS'


*APPEND THE EARLIER YEARS

	*|||||||||||||||||||||||||||||
	append using `SUMD_84_04_var_rename'
	*|||||||||||||||||||||||||||||

				
		
*take out territories
local drop_st "NI FM PR VI AS GU MH MP PW"
foreach x of local drop_st {
	drop if stalpbr=="`x'"
}

		
********************************************************************************
preserve

/*	In this step, we use the 2013 MSA Delineation File from the census  
	as a crosswalk to assign county-based SOD locations (FIPS codes) to 2013 vintage MSAs.
	(The census file can be obtained here:https://www2.census.gov/programs-surveys/metro-micro/geographies/reference-files/2013/delineation-files/list1.xls)
	
*/


	*Matching SOD FIPS codes to MSA vintage 2013 county delineations 
	import excel using "`raw2020_dir'/list1.xls", firstrow cellrange("A3:L1888") clear

	
	*A few assert statements to ensure that same data is read in for you
	assert strpos(CBSACode[1885],"March 2013")
	drop in 1883/1885
	assert CBSATitle[1882]=="Zapata, TX"

	gen year=2013
	gen mth=02 //census website says these codes were released in February of that year

	rename MetropolitanMicropolitanStatis LevelofCBSA
	rename CountyCountyEquivalent ComponentName
	rename StateName State

	gen str5 FIPS=FIPSStateCode+FIPSCountyCode

	
	destring CBSACode,gen(msabr)  //msabr - msa-of-'branch' - 
	//Note: for those who might use the FDIC public data - this 'msabr' variable does not equal the 'msabr' variable present in the FDIC data.

	keep FIPS msabr CBSACode LevelofCBSA ComponentName State
	drop if State=="Puerto Rico"
	
	duplicates tag FIPS,gen(tag)
	assert tag==0
	drop tag
	
			*#################################
			tempfile bh_01d_FIPS_MSA13
			save 	`bh_01d_FIPS_MSA13',replace
			*#################################

restore
********************************************************************************

* MERGE the relevant geographic data back into the full dataset

	*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
	merge m:1 FIPS using `bh_01d_FIPS_MSA13' //keep _merge for classifying geocodes
	*<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

* drop unmatched observations from the using (geographic dataset) as we only care about having location data for our
* original SOD dataset (drops nonmatches from census file). 
	
drop if _merge==2  	


/*********See bhreadme.docx for detail on the handling of banks not in an MSA.*************/


replace msabr=0 if msabr==. & _merge==1   	/*unmatched branch-level deposits in unmatched counties get sent to rural*/


gen fips_no_msa_sent_to_rural=FIPS 	if msabr==0 & _merge==1			//optional: keep unmatched county codes sent to 'rural' for reference


drop _merge State CBSACode FIPS ComponentName



/*Here, we check to see if we are missing any PSID msas.
	Note: This cannot be done without access to the restricted PSID data.*/
	
gen CBSA = msabr

cd "`raw2020_dir'"

	*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
	merge m:1 CBSA using "listofCBSAInPSID"
	*<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

	
drop if _merge==2   		//the 2 locations that don't match are 9999/9998, which are PSID unknown/rural codes

gen msa_not_in_psid = msabr 	if _merge==1 & msabr!=0				
la var msa_not_in_psid "Flag MSAs not in PSID that have valid deposits"

replace msabr=0 				if _merge==1 & msabr!=0		//sending to rural if not in psid
drop _merge


cd "`out2020_dir'"


*SAVE SOD data with the relevant MSA delineations. 

		*#################################
		save "`out2020_dir'/bh-01c-SOD-wGeocodes",replace
		*#################################	
	
